####################################################################### 
# Vladimir Chlouba, Daniel Smith & Seamus Wagner
# The Ohio State University, Department of Political Science
# Corresponding author: chlouba.1@osu.edu
#######################################################################
# Early Statehood and Support for Autocratic Rule
#######################################################################

# loading the required R packages
library(ggplot2)
library(foreign)
library(data.table)
library(stargazer)
library(lme4)
library(arm)
library(memisc)
library(vegan)
library(MASS)
library(nnet)
library(spikeSlabGAM)
library(speedglm)
library(biglm)
library(ordinal)
library(erer)
library(mnlogit)
library(mlogit)
library(scales)
library(nnet)
library(haven)
library(lfe)
library(GISTools)
library(rgdal)
library(sp)
library(raster)
library(maptools)
library(rgeos)
library(lattice)
library(spdep)
library(cshapes)
library(countrycode)
library(margins)
library(ggmap)
library(dplyr)
library(lmtest)
library(sandwich)
library(fixest)
library(ivmodel)
library(dotwhisker)
library(logisticPCA)
library(rethinking)

rm(list = ls())
#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

######################################################################
# Precolonial Centralization and Local Leader Selection (Figure 1)
######################################################################

# loading the dataset
Murdock <- read.csv("Murdock_replication.csv")

# producing a cross-tab of centralization and local leader selection
table(Murdock$centralization, Murdock$appointment)

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

######################################################################
# Precolonial Centralization and Support for Autocratic Rule (Table 1)
######################################################################

# loading the dataset
afro.master <- read.csv("autocratic_rule_data_replication.csv")

# creating lists of variables
iv_list <- c("centralization", "TSI", "centralization*col_britain")


control_list_grid_pre <- "wateraccess+soil_quality+abslat+mnt2000+avgprec+prec2+avgtemp+meanrh+malaria_index+prop_tropics+ln_popd_murdock+coast+river" 
control_list_grid_post <- "+capdist+bdist1+conflict+lpop+loglightsavg+polity+land_area+slave_exports+intensive"
control_list_indiv_pre <- "+female+age"
control_list_indiv_post <- "+female+age+education+employed+urban"

dv_list <- c("big_man_ord", "centralization")


# model formulas
fmla1 <- as.formula(paste(paste(dv_list[1],"~"),
                           paste(iv_list[1]),
                           paste("| country + round | 0 | NAME")))

fmla2 <- as.formula(paste(paste(dv_list[1],"~"),
                           paste(iv_list[1], "+"),
                           paste(control_list_grid_pre),
                           paste("| country + round | 0 | NAME")))

fmla3 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[1], "+"),
                          paste(control_list_grid_pre),
                          paste(control_list_grid_post),
                          paste("| country + round | 0 | NAME")))

fmla4 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[1], "+"),
                          paste(control_list_grid_pre),
                          paste(control_list_grid_post),
                          paste(control_list_indiv_pre),
                          paste("| country + round | 0 | NAME")))

fmla5 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[1], "+"),
                          paste(control_list_grid_pre),
                          paste(control_list_grid_post),
                          paste(control_list_indiv_pre),
                          paste(control_list_indiv_post),
                          paste("| country + round | 0 | NAME")))

# estimate coefficients
m1 <- felm(fmla1, data=afro.master)
m2 <- felm(fmla2, data=afro.master)
m3 <- felm(fmla3, data=afro.master)
m4 <- felm(fmla4, data=afro.master)
m5 <- felm(fmla5, data=afro.master)


# producing LaTeX code
stargazer(m1, m2, m3, m4, m5, no.space = T, digits = 2)


#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

#################################################################################
# Precolonial Centralization and Support for Autocratic Rule (IV-2SLS), (Table 2)
#################################################################################

# creating AB round indicator variables to ensure compatibility with ivmodel 
afro.master$R6 <- 0
afro.master$R6[afro.master$round==6] <- 1

afro.master$R5 <- 0
afro.master$R5[afro.master$round==5] <- 1

afro.master$R4 <- 0
afro.master$R4[afro.master$round==4] <- 1

afro.master$R3 <- 0
afro.master$R3[afro.master$round==3] <- 1


# preparing lists of covariates
one <- c("country", "R6", "R5", "R4", "R3")

two <- c("wateraccess", "soil_quality", "abslat", "mnt2000", "avgprec", "prec2", "avgtemp", 
         "meanrh", "malaria_index", "prop_tropics", "ln_popd_murdock", "coast", "river","country", 
         "R6", "R5", "R4", "R3")

three <- c("wateraccess", "soil_quality", "abslat", "mnt2000", "avgprec", "prec2", "avgtemp", 
           "meanrh", "malaria_index", "prop_tropics", "ln_popd_murdock", "coast", "river",
           "capdist", "bdist1", "conflict", "lpop", "loglightsavg", "polity", "land_area",
           "intensive", "slave_exports", "country", "R6", "R5", "R4", "R3")

four <- c("wateraccess", "soil_quality", "abslat", "mnt2000", "avgprec", "prec2", "avgtemp", 
          "meanrh", "malaria_index", "prop_tropics", "ln_popd_murdock", "coast", "river",
          "capdist", "bdist1", "conflict", "lpop", "loglightsavg", "polity", "land_area",
          "intensive", "slave_exports", "female", "age","country", "R6", "R5", "R4", 
          "R3")

five <- c("wateraccess", "soil_quality", "abslat", "mnt2000", "avgprec", "prec2", "avgtemp", 
          "meanrh", "malaria_index", "prop_tropics", "ln_popd_murdock", "coast", "river",
          "capdist", "bdist1", "conflict", "lpop", "loglightsavg", "polity", "land_area",
          "intensive", "slave_exports", "female", "age","education", "employed", 
         "urban", "country", "R6", "R5", "R4", "R3")


# subsetting lists of covariates
first <- afro.master[, one]
second <- afro.master[, two]
third <- afro.master[, three]
fourth <- afro.master[, four]
fifth <- afro.master[, five]

# defining other variables
instrument <- afro.master[, "TSI"]
cluster <- afro.master[, "NAME"]
DV <- afro.master[, "big_man_ord"]
central <- afro.master[, "centralization"]


# model 1
iv.1 <- ivmodel(Y = DV, X = first, Z = instrument, D = central, na.action = na.omit, 
                heteroSE = T, clusterID = cluster)
summary(iv.1)


# model 2
iv.2 <- ivmodel(Y = DV, X = second, Z = instrument, D = central, na.action = na.omit, 
                heteroSE = T, clusterID = cluster)
summary(iv.2)


# model 3
iv.3 <- ivmodel(Y = DV, X = third, Z = instrument, D = central, na.action = na.omit, 
                heteroSE = T, clusterID = cluster)
summary(iv.3)


# model 4
iv.4 <- ivmodel(Y = DV, X = fourth, Z = instrument, D = central, na.action = na.omit, 
                heteroSE = T, clusterID = cluster)
summary(iv.4)


# model 5
iv.5 <- ivmodel(Y = DV, X = fifth, Z = instrument, D = central, na.action = na.omit, 
                heteroSE = T, clusterID = cluster)
summary(iv.5)


# producing first-stage estimates

# model formulas
fmla1 <- as.formula(paste(paste(dv_list[2],"~"),
                          paste(iv_list[2]),
                          paste("| country + round | 0 | NAME")))

fmla2 <- as.formula(paste(paste(dv_list[2],"~"),
                          paste(iv_list[2], "+"),
                          paste(control_list_grid_pre),
                          paste("| country + round | 0 | NAME")))

fmla3 <- as.formula(paste(paste(dv_list[2],"~"),
                          paste(iv_list[2], "+"),
                          paste(control_list_grid_pre),
                          paste(control_list_grid_post),
                          paste("| country + round | 0 | NAME")))


fmla4 <- as.formula(paste(paste(dv_list[2],"~"),
                          paste(iv_list[2], "+"),
                          paste(control_list_grid_pre),
                          paste(control_list_grid_post),
                          paste(control_list_indiv_pre),
                          paste("| country + round | 0 | NAME")))

fmla5 <- as.formula(paste(paste(dv_list[2],"~"),
                          paste(iv_list[2], "+"),
                          paste(control_list_grid_pre),
                          paste(control_list_grid_post),
                          paste(control_list_indiv_pre),
                          paste(control_list_indiv_post),
                          paste("| country + round | 0 | NAME")))

# estimate coefficients
m1 <- felm(fmla1, data=afro.master)
m2 <- felm(fmla2, data=afro.master)
m3 <- felm(fmla3, data=afro.master)
m4 <- felm(fmla4, data=afro.master)
m5 <- felm(fmla5, data=afro.master)


# summaries
summary(m1)
summary(m2)
summary(m3)
summary(m4)
summary(m5)


#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

#################################################################
# Generationally Transmitted Norms as a Mechanism (Figure 3)
#################################################################

# different colonial rulers
British <- subset(afro.master, afro.master$col_britain==1)

French <- subset(afro.master, afro.master$col_france==1)

# model formulas
fmla1 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[1]),
                          paste("| round + country | 0 | NAME")))



fmla2 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[1], "+"),
                          paste(control_list_grid_pre),
                          paste(control_list_grid_post),
                          paste(control_list_indiv_pre),
                          paste(control_list_indiv_post),
                          paste("| round + country | 0 | NAME")))

# estimate coefficients for French colonies
m1 <- felm(fmla1, data=French)
m2 <- felm(fmla2, data=French)


# estimate coefficients for British colonies
m3 <- felm(fmla1, data=British)
m4 <- felm(fmla2, data=British)


# different degrees of contact with traditional leaders
no.contact <- subset(afro.master, afro.master$contact_trad==0)

contact <- subset(afro.master, afro.master$contact_trad==1)


# estimate coefficients for people with no contact
m5 <- felm(fmla1, data=no.contact)
m6 <- felm(fmla2, data=no.contact)

# estimate coefficients for people with contact
m7 <- felm(fmla1, data=contact)
m8 <- felm(fmla2, data=contact)



# comparing residents and non-residents of "ancestral" areas
nonresidents <- subset(afro.master, afro.master$resident==0)

residents <- subset(afro.master, afro.master$resident==1)


# estimate coefficients for nonresidents
m9 <- felm(fmla1, data=nonresidents)
m10 <- felm(fmla2, data=nonresidents)

# estimate coefficients for residents
m11 <- felm(fmla1, data=residents)
m12 <- felm(fmla2, data=residents)


# visualization via a coefficient plot (Figure 3)
term <- c("French colonies", "French colonies", "British colonies", "British colonies", 
          "No contact", "No contact", "Contact", "Contact", "Non-residents", "Non-residents",
          "Residents", "Residents")

estimate <- c(m1$coefficients[1], m2$coefficients[1], m3$coefficients[1], m4$coefficients[1], 
              m5$coefficients[1], m6$coefficients[1], m7$coefficients[1], m8$coefficients[1], 
              m9$coefficients[1], m10$coefficients[1], m11$coefficients[1], m12$coefficients[1])

std.error <- c(m1$cse[1], m2$cse[1], m3$cse[1], m4$cse[1], m5$cse[1], m6$cse[1], m7$cse[1],
               m8$cse[1], m9$cse[1], m10$cse[1], m11$cse[1], m12$cse[1])

model <- c("Model 1", "Model 2", "Model 1", "Model 2", "Model 1", "Model 2", "Model 1", "Model 2",
           "Model 1", "Model 2", "Model 1", "Model 2")

data <- as.data.frame(cbind(term, estimate, std.error, model))

data$term <- as.character(data$term)
data$estimate <- as.numeric(data$estimate)
data$std.error <- as.numeric(std.error)

dwplot(data, color = model, dot_args = (list(size = 2, pch = c(19, 15,
                                                               19, 15,
                                                               19, 15,
                                                               19, 15,
                                                               19, 15, 
                                                               19, 15))), 
       whisker_args =  list(lwd = 1)) +
  labs(x = "Coefficient", 
       y = "Specification") +
  scale_color_manual(name = "Models", values = c("Model 1" = "red", "Model 2" = "blue"), 
                     labels = c("Model 1" = "FEs only", "Model 2" = "FEs + controls")) +
  theme_classic(base_size = 13 ) +
  geom_vline(xintercept = 0, colour = "black", linetype = 3)
 

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~

#####################################################################################
# Precolonial Centralization and Support for Autocratic Rule in West Africa (Table 3)
#####################################################################################

# loading the dataset
West.Africa <- read.csv("West_Africa_replication.csv")
West.Africa <- West.Africa %>%
  dplyr::rename(NAME = �..NAME)
# model formulas 
fmla1 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[3]),
                          paste("| round + country + NAME | 0 | NAME")))

fmla2 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[3], "+"),
                          paste(control_list_grid_pre),
                          paste("| round + country + NAME | 0 | NAME")))

fmla3 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[3], "+"),
                          paste(control_list_grid_pre),
                          paste(control_list_grid_post),
                          paste("| round + country + NAME | 0 | NAME")))

fmla4 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[3], "+"),
                          paste(control_list_grid_pre),
                          paste(control_list_grid_post),
                          paste(control_list_indiv_pre),
                          paste("| round + country + NAME | 0 | NAME")))

fmla5 <- as.formula(paste(paste(dv_list[1],"~"),
                          paste(iv_list[3], "+"),
                          paste(control_list_grid_pre),
                          paste(control_list_grid_post),
                          paste(control_list_indiv_pre),
                          paste(control_list_indiv_post),
                          paste("| round + country + NAME | 0 | NAME")))


# estimate coefficients
m1 <- felm(fmla1, data=West.Africa)
m2 <- felm(fmla2, data=West.Africa)
m3 <- felm(fmla3, data=West.Africa)
m4 <- felm(fmla4, data=West.Africa)
m5 <- felm(fmla5, data=West.Africa)


# producing LaTeX code
stargazer(m1, m2, m3, m4, m5, no.space = T, digits = 2)

#~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~^~
